Load Data
data = read_csv('../citibike_clean/citibike_clean.csv')
overweight_data = data |>
select(start_station_latitude,
start_station_longitude,
start_uhf34_neighborhood,
start_zipcode,
end_station_latitude,
end_station_longitude,
end_uhf34_neighborhood,
start_borough, end_borough,
end_zipcode,
start_percent_overweight,
end_percent_overweight) |>
mutate(neighborhood = coalesce(end_uhf34_neighborhood, start_uhf34_neighborhood),
lat = coalesce(end_station_latitude, start_station_latitude),
long = coalesce(end_station_longitude, start_station_longitude),
borough = coalesce(start_borough, end_borough),
zipcode = coalesce(start_zipcode, end_zipcode),
overweight = coalesce(start_percent_overweight, end_percent_overweight)) |>
unique()
head(overweight_data) |>
knitr::kable()
| 40.73056 |
-73.97398 |
Union Square, Lower Manhattan |
10009 |
40.73222 |
-73.98166 |
Union Square, Lower Manhattan |
Manhattan |
Manhattan |
10009 |
40.5 |
40.5 |
Union Square, Lower Manhattan |
40.73222 |
-73.98166 |
Manhattan |
10009 |
40.5 |
| 40.68292 |
-73.99318 |
Downtown Heights Slope |
11217 |
40.69308 |
-73.97179 |
Bedford Stuyvesant Crown Heights |
Brooklyn |
Brooklyn |
11238 |
50.8 |
62.9 |
Bedford Stuyvesant Crown Heights |
40.69308 |
-73.97179 |
Brooklyn |
11217 |
50.8 |
| 40.78473 |
-73.96962 |
Upper West Side |
10024 |
40.76585 |
-73.98691 |
Chelsea Village |
Manhattan |
Manhattan |
10019 |
43.4 |
38.1 |
Chelsea Village |
40.76585 |
-73.98691 |
Manhattan |
10024 |
43.4 |
| 40.74620 |
-73.98856 |
Chelsea Village |
10019 |
40.76030 |
-73.99884 |
Chelsea Village |
Manhattan |
Manhattan |
10018 |
38.1 |
38.1 |
Chelsea Village |
40.76030 |
-73.99884 |
Manhattan |
10019 |
38.1 |
| 40.70201 |
-73.92377 |
Williamsburg Bushwick |
11237 |
40.70624 |
-73.93387 |
Williamsburg Bushwick |
Brooklyn |
Brooklyn |
11206 |
61.8 |
61.8 |
Williamsburg Bushwick |
40.70624 |
-73.93387 |
Brooklyn |
11237 |
61.8 |
| 40.79127 |
-73.96484 |
Upper West Side |
10025 |
40.75020 |
-73.99093 |
Chelsea Village |
Manhattan |
Manhattan |
10001 |
43.4 |
38.1 |
Chelsea Village |
40.75020 |
-73.99093 |
Manhattan |
10025 |
43.4 |
Summary Statistics
overweight_data |>
summarize(
mean = mean(overweight, na.rm = TRUE),
min = min(overweight, na.rm = TRUE),
max = max(overweight, na.rm = TRUE),
median = median(overweight, na.rm = TRUE),
std = sd(overweight, na.rm = TRUE)
) |>
knitr::kable()
| 46.86309 |
36.5 |
71.2 |
41.1 |
10.86188 |
Percent by Zipcode
percent_obese =
overweight_data |>
ggplot(aes(x = reorder(zipcode, -overweight), y = overweight,)) +
geom_point() +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
labs(title = "Percent of adults classified as overweight or obese, by area", x = "Location", y = "Percent")
print(percent_obese)

Percent by UHF34 Neighborhood
percent_obese =
overweight_data |>
group_by(neighborhood) |>
summarize(overweight = mean(overweight)) |>
ggplot(aes(x = reorder(neighborhood, -overweight), y = overweight,)) +
geom_point() +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
labs(title = "Percent of adults classified as overweight or obese, by area", x = "Location", y = "Percent")
print(percent_obese)

# Create a leaflet map
# Select the necessary columns
map_data <- overweight_data %>%
select(lat, long, overweight)
map <- leaflet(data = map_data) %>%
addTiles() # Add map tiles (you can use different tile providers)
# Add color-coded circles based on overweight percentage
map <- map %>%
addCircleMarkers(
radius = 5, # Adjust the circle size as needed
fillColor = ~colorFactor("Blues", map_data$overweight)(overweight),
color = "black",
fillOpacity = 0.7,
popup = ~paste("Overweight Percentage:", overweight, "%"),
label = ~paste("Overweight Percentage:", overweight, "%")
)
# Display the map
map